

require_package <- function(package){
  if(!is.element(package, .packages(all.available = TRUE))){
    try(install.packages(package, repos="http://cran.rstudio.com"), silent = TRUE)
  }
  suppressPackageStartupMessages(library(package,character.only=T, quietly = TRUE))  
}

res <- lapply(c('tidyverse', 'haven', 'sjlabelled', 'reldist', 'ineq', 'gglorenz', 'GB2', 'GB2group', 'VGAM', 'ggpubr', 'gganimate', 'countrycode', 'data.table', 'stargazer', 'ggthemes', 'ggrepel'), require_package)



#DATA FOLDER - PLEASE REPLACE THE PATH

############# NIGERIA ########################
data_nigeria<- read_dta("NGA_wave3_4_covid_merged.dta")
#show labels
# print variable labels 
variable_names <- data_nigeria %>% sjlabelled::get_label() %>% enframe()

#now add also income data
data_nigeria_income_v2 <- read_dta("NGA_wave_3_4_income.dta")

#household panel (not individual!)
data_nigeria_hh <- data_nigeria %>% group_by(hhid) %>% dplyr::summarize(sex=sex[indiv==1], hhmembers=n(), wt_baseline=mean(wt_baseline, na.rm = T), s9q27=sum(s9q27, na.rm=T)) %>% ungroup()

#total income
#national poverty line 137400 Nira (roughly 361 U.S. dollars) 
data_nigeria_hh %>% filter(!is.na(wt_baseline)) %>% dplyr::summarize(poverty=weighted.mean(s9q27<137400, w = wt_baseline*hhmembers, na.rm = T))

#income: s9q27 
#since march has it increases 1, same, 2, decreased 3   (s7q2)
percent_chg = 0.296 #assume 33% increases/decrease

data_nigeria_inequality <- data_nigeria %>% mutate(income_before=s9q27, income_after=s9q27 * (1 + (s7q2-2)* (-1)* (percent_chg))) %>% select(income_before, income_after, s7q2, wt_baseline)
#Gini
data_nigeria_inequality  %>% filter(!is.na(income_before) & !is.na(income_after) & !is.na(wt_baseline)) %>% dplyr::summarize(gini_cons_feb2020=gini(income_before, weights = wt_baseline), gini_cons_mar2020=gini(income_after, weights = wt_baseline))

#now with updated data
data_nigeria_income_v2 <- data_nigeria_income_v2 %>% mutate(income_before=wave4_s9q27, income_after=wave4_s9q27 * (1 + (s7q2-2)* (-1)* (percent_chg))) %>% select(income_before, income_after, s7q2, wt_baseline)
#Gini
data_nigeria_income_v2 %>% filter(!is.na(income_before) & !is.na(income_after) & !is.na(wt_baseline)) %>% dplyr::summarize(gini_cons_feb2020=gini(income_before, weights = wt_baseline), gini_cons_mar2020=gini(income_after, weights = wt_baseline))
data_nigeria_inequality <- data_nigeria_income_v2

lc_nigeria <- ggplot(data_nigeria_inequality) + stat_lorenz(aes(income_before, color="February 2020")) + stat_lorenz(aes(income_after, color="March 2020")) + geom_abline(color = "grey") + xlab("") + ylab("") + labs(color="Month") + theme(legend.position = "bottom") + ggtitle("Nigeria, Income") + scale_color_manual(values=c("February 2020" = "black", "March 2020" = "blue", "April 2020" = "green"), limits = c("February 2020", "March 2020", "April 2020"))

ggplot(data_nigeria_inequality %>% arrange(income_before) %>% filter(!is.na(income_before) & income_before>0) %>% mutate(order=1:n()) %>% mutate(s7q2_filled=ifelse(is.na(s7q2), 2, s7q2)) %>% filter(!is.na(s7q2))) + geom_point(aes(order, income_before, color=as.character(s7q2))) + scale_color_discrete(labels = c("2"="unchanged", "1"="increased", "3"="decreased"), name="affected")


################# SOUTH AFRICA ####################
data_zaf <- read_dta("SA_inc_change_demog.dta")
#show labels
# print variable labels 
variable_names_zaf <- data_zaf %>% sjlabelled::get_label() %>% enframe()

# Gender 
# Men 1
# Women 2
# 
# Population group
# African/Black 1
# Coloured 2
# Asian/Indian 3
# White 4
# income in Rand per month

#inequality measures
data_zaf  %>% dplyr::summarize(gini_feb2020=gini(w1_nc_eminc_feb, weights = w1_nc_wgt), gini_apr2020=gini(w1_nc_eminc_apr, weights = w1_nc_wgt))
#Gini
lc_zaf_income <- ggplot(data_zaf) + stat_lorenz(aes(w1_nc_eminc_feb, color="February 2020")) + stat_lorenz(aes(w1_nc_eminc_apr, color="April 2020")) + geom_abline(color = "grey") + xlab("") + ylab("") + labs(color="Month") + theme(legend.position = "bottom") + ggtitle("South Africa, Income") + scale_color_manual(values=c("February 2020" = "black", "March 2020" = "blue", "April 2020" = "green"), limits = c("February 2020", "March 2020", "April 2020"))

#Probit of loss
#summary(glm("loss ~ w1_nc_best_age_yrs + w1_nc_best_race + w1_nc_emwrk_isco_c + w1_nc_edter + w1_nc_edschgrd", data = data_zaf %>% mutate(loss=ifelse(inc_change<0,1,0)), family = binomial(link = "probit")))

################# INDIA ####################
data_india <- read_dta("India_inc_change_demog.dta")
#show labels
# print variable labels 
data_india %>% sjlabelled::get_label() %>% enframe()

data_india <- data_india%>% filter(!is.na(post_lock_wage_r1))

#inequality measures

#income
data_india  %>% dplyr::summarize(gini_wage_feb2020=gini(pre_lock_wage, weights = weight_raked), gini_wage_mar2020=gini(post_lock_wage_r1, weights = weight_raked))
#Gini
lc_india_income <- ggplot(data_india) + stat_lorenz(aes(pre_lock_wage, color="February 2020")) + stat_lorenz(aes(post_lock_wage_r1, color="April 2020")) + geom_abline(color = "grey") + xlab("") + ylab("") + labs(color="Month") + theme(legend.position = "bottom") + ggtitle("India, Income") + scale_color_manual(values=c("February 2020" = "black", "March 2020" = "blue", "April 2020" = "green"))

#consumption
data_india  %>% filter(!is.na(con_wk_mar) & !is.na(con_wk_feb)) %>% dplyr::summarize(gini_cons_feb2020=gini(con_wk_feb, weights = weight_raked), gini_cons_mar2020=gini(con_wk_mar, weights = weight_raked))
#Gini
lc_india_consumption <- ggplot(data_india) + stat_lorenz(aes(con_wk_feb, color="February 2020")) + stat_lorenz(aes(con_wk_mar, color="March 2020")) + geom_abline(color = "grey") + xlab("") + ylab("") + labs(color="Month") + theme(legend.position = "bottom") + ggtitle("India, Consumption") + scale_color_manual(values=c("February 2020" = "black", "March 2020" = "blue", "April 2020" = "green"))

################# KENYA ####################
data_ken <- read_dta("Kenya_inc_change_demog.dta")
#show labels
# print variable labels 
variable_names_ken <- data_ken %>% sjlabelled::get_label() %>% enframe()
data_ken <- data_ken %>% filter(!is.na(pre_lock_income) & !is.na(post_lock_income)& !is.na(weight))

#inequality measures
data_ken  %>% dplyr::summarize(gini_feb2020=gini(pre_lock_income  , weights = weight), gini_apr2020=gini(post_lock_income , weights = weight))
#Gini
lc_ken_income <- ggplot(data_ken) + stat_lorenz(aes(pre_lock_income, color="February 2020")) + stat_lorenz(aes(post_lock_income, color="April 2020")) + geom_abline(color = "grey") + xlab("") + ylab("") + labs(color="Month") + theme(legend.position = "bottom") + ggtitle("Kenia, Income") + scale_color_manual(values=c("February 2020" = "black", "March 2020" = "blue", "April 2020" = "green"), limits = c("February 2020", "March 2020", "April 2020"))

################# UGANDA ####################
data_ugd <- read_dta("Uganda_inc_change_demog.dta")
#show labels
# print variable labels 
variable_names_ugd <- data_ugd %>% sjlabelled::get_label() %>% enframe()
data_ugd <- data_ugd %>% filter(!is.na(pre_lock_income) & !is.na(post_lock_income)& !is.na(weight)) %>% mutate(pre_lock_income=pmax(0,pre_lock_income), post_lock_income=pmax(0,post_lock_income))

#inequality measures
data_ugd  %>% dplyr::summarize(gini_feb2020=gini(pre_lock_income  , weights = weight), gini_apr2020=gini(post_lock_income , weights = weight))
#Gini
lc_ugd_income <- ggplot(data_ugd) + stat_lorenz(aes(pre_lock_income, color="February 2020")) + stat_lorenz(aes(post_lock_income, color="April 2020")) + geom_abline(color = "grey") + xlab("") + ylab("") + labs(color="Month") + theme(legend.position = "bottom") + ggtitle("Uganda, Income") + scale_color_manual(values=c("February 2020" = "black", "March 2020" = "blue", "April 2020" = "green"), limits = c("February 2020", "March 2020", "April 2020"))

################## COMBINED ANALYSIS ##########################
ggarrange(lc_zaf_income, lc_nigeria, lc_ken_income, lc_ugd_income, lc_india_income, lc_india_consumption, nrow = 2, ncol = 3, common.legend = T, legend = "bottom")
ggsave("lorenz_curves_arranged.pdf", scale=1, width = 8, height = 6)

require(xtable)

gini_matrix <- 
  data.frame(country=c("Nigeria", "South Africa", "Kenya", "Uganda", "India", "India"), measure=c("income", "income", "income", "income", "income", "consumption"), 
             "Avg. [%]" = c(               
               as.numeric(data_nigeria_inequality  %>% filter(!is.na(income_before) & !is.na(income_after) & !is.na(wt_baseline)) %>% dplyr::summarize(diff=100*(weighted.mean(income_after, weights = wt_baseline)/weighted.mean(income_before, weights = wt_baseline)-1))),
               as.numeric(data_zaf  %>% dplyr::summarize(diff=100*(weighted.mean(w1_nc_eminc_apr, weights = w1_nc_wgt)/weighted.mean(w1_nc_eminc_feb, weights = w1_nc_wgt)-1))),
               as.numeric(data_ken  %>% dplyr::summarize(diff=100*(weighted.mean(post_lock_income, weights = weight)/weighted.mean(pre_lock_income, weights = weight)-1))),
               as.numeric(data_ugd  %>% dplyr::summarize(diff=100*(weighted.mean(post_lock_income, weights = weight)/weighted.mean(pre_lock_income, weights = weight)-1))),
               as.numeric(data_india  %>% dplyr::summarize(diff=100*(weighted.mean(post_lock_wage_r1, weights = weight_raked)/weighted.mean(pre_lock_wage, weights = weight_raked)-1))),
               as.numeric(data_india  %>% filter(!is.na(con_wk_mar) & !is.na(con_wk_feb)) %>% dplyr::summarize(diff=100*(weighted.mean(con_wk_mar, weights = weight_raked)/weighted.mean(con_wk_feb, weights = weight_raked)-1)))
             ),
             "Gini Feb." = c(
               as.numeric(data_nigeria_inequality  %>% filter(!is.na(income_before) & !is.na(income_after) & !is.na(wt_baseline)) %>% dplyr::summarize(gini_cons_feb2020=gini(income_before, weights = wt_baseline))),
               as.numeric(data_zaf  %>% dplyr::summarize(gini_feb2020=gini(w1_nc_eminc_feb, weights = w1_nc_wgt))),
               as.numeric(data_ken  %>% dplyr::summarize(gini_apr2020=gini(pre_lock_income, weights = weight))),
               as.numeric(data_ugd  %>% dplyr::summarize(gini_apr2020=gini(pre_lock_income, weights = weight))),
               as.numeric(data_india  %>% dplyr::summarize(gini_wage_feb2020=gini(pre_lock_wage, weights = weight_raked))),
               as.numeric(data_india  %>% filter(!is.na(con_wk_mar) & !is.na(con_wk_feb)) %>% dplyr::summarize(gini_cons_mar2020=gini(con_wk_feb, weights = weight_raked)))
             ),  
             "Gini Apr." = c(               
               as.numeric(data_nigeria_inequality  %>% filter(!is.na(income_before) & !is.na(income_after) & !is.na(wt_baseline)) %>% dplyr::summarize(gini_cons_mar2020=gini(income_after, weights = wt_baseline))),
               as.numeric(data_zaf  %>% dplyr::summarize(gini_apr2020=gini(w1_nc_eminc_apr, weights = w1_nc_wgt))),
               as.numeric(data_ken  %>% dplyr::summarize(gini_apr2020=gini(post_lock_income, weights = weight))),
               as.numeric(data_ugd  %>% dplyr::summarize(gini_apr2020=gini(post_lock_income, weights = weight))),
               as.numeric(data_india  %>% dplyr::summarize(gini_wage_apr2020=gini(post_lock_wage_r1, weights = weight_raked))),
               as.numeric(data_india  %>% filter(!is.na(con_wk_mar) & !is.na(con_wk_feb)) %>% dplyr::summarize(gini_cons_mar2020=gini(con_wk_mar, weights = weight_raked)))
             )
             
             
             , check.names = FALSE)

print(gini_matrix)


#add macro data
load("dataset_merged.Rdata")
#last gini index from WIID
data_hist_gini <- dataset_merged %>% mutate(country=countrycode::countrycode(origin = "iso3c", destination = "country.name", sourcevar=iso3)) %>% select(gini_disp, gini_mkt, year, country) %>% filter(country %in% gini_matrix$country) %>% group_by(country) %>% arrange(year) %>% filter(!is.na(gini_disp)) %>% slice(n())

load("dataset_projection.Rdata")
#last gini index from IMF projection
imf_growth_2020 <- dataset_projection %>% ungroup() %>% mutate(country=countrycode::countrycode(origin = "iso3c", destination = "country.name", sourcevar=iso3)) %>% select(gdp_growth_updated_october2020, year, country) %>% filter(country %in% gini_matrix$country) %>% group_by(country) %>% filter(year==2020)

#add macro stats
gini_matrix <- gini_matrix %>% 
  #  left_join(data_hist_gini %>% mutate("Gini (net)"=paste0(gini_disp/100), "Gini (gross)"=paste0(gini_mkt/100)) %>% select(-year, -gini_disp, -gini_mkt)) %>% 
  left_join(data_hist_gini %>% mutate("SWIID"=paste0(gini_disp/100)) %>% select(-year, -gini_disp, -gini_mkt)) %>% 
  left_join(imf_growth_2020 %>% select(-year) %>% dplyr::rename("GDP chg." = gdp_growth_updated_october2020)) 

print(xtable(gini_matrix, digits = 3, label = "tab:gini", caption = "Aggregate estimates for 2020, various countries", ), file="gini_table.tex", include.rownames=FALSE, size = "small")


# trying animate graphs
#ZAF
ggplot(data_zaf) + stat_lorenz(aes(w1_nc_eminc_feb, color="February 2020")) + stat_lorenz(aes(w1_nc_eminc_apr, color="April 2020")) + geom_abline(color = "grey") + xlab("") + ylab("") + labs(color="Month") + theme(legend.position = "bottom") + ggtitle("South Africa, Income") + scale_color_manual(values=c("February 2020" = "black", "March 2020" = "blue", "April 2020" = "green"), limits = c("February 2020", "March 2020", "April 2020"))

conversion=0.067 #ZAR to USD
lorenzcurve_scalefactor = 3600
data_animate <- data_zaf %>% arrange(w1_nc_eminc_feb) %>% mutate(id=row_number()) %>% as.data.frame()
data_animate <- zap_labels(data_animate)
data_animate <- data_animate %>% mutate(average_pre=mean(w1_nc_eminc_feb*conversion), average_post=mean(w1_nc_eminc_apr*conversion), individual_pre=w1_nc_eminc_feb*conversion, individual_post=w1_nc_eminc_apr*conversion) %>% select(id,average_pre,average_post,individual_pre,individual_post) 
#add cumsum for gini
data_animate <- data_animate %>% mutate(cumsum_pre=cumsum(individual_pre)/max(cumsum(individual_pre))*lorenzcurve_scalefactor, cumsum_post=cumsum(individual_post)/max(cumsum(individual_post))*lorenzcurve_scalefactor)
#smooth post income
#data_animate <- data_animate %>% mutate(individual_post = predict(loess(individual_post ~ id, span = .1)))
data_animate <- data_animate %>% pivot_longer(cols = average_pre:cumsum_post) 
data_animate <- data_animate %>% separate(col = name, into = c("aggregation", "period"), sep="_")
#now reorder cumsum_post to get correct post LC
data_animate <- rbind(data_animate %>% filter(!(aggregation=="cumsum" & period=="post")), data_animate %>% filter(aggregation=="individual" & period=="post") %>% arrange(value) %>% mutate(value=cumsum(value)/max(cumsum(value))*lorenzcurve_scalefactor, id=row_number(), aggregation="cumsum"))

ggplot(data_animate, aes(x=id, y=value)) + geom_smooth(aes(color=period, group=interaction(aggregation, period)), method="loess", span = 0.1, se=F) + xlab("Households") + ylab("Monthly Income [USD]") + ylim(0,max(data_animate$value)*.5) #+ scale_y_log10() 

#as individual slides
ggplot(data_animate %>% filter(aggregation=="average"), aes(x=id, y=value)) + geom_smooth(aes(color=period, group=interaction(aggregation, period)), method="loess", span = 0.1, se=F) + xlab("Households") + ylab("Monthly Income [USD]") + ylim(0,max(data_animate$value)*.5)
ggsave("ZAF_LC_constructed_1.pdf", scale=1, width = 8, height = 6)
ggplot(data_animate %>% filter(aggregation=="individual"), aes(x=id, y=value)) + geom_smooth(aes(color=period, group=interaction(aggregation, period)), method="loess", span = 0.1, se=F) + xlab("Households") + ylab("Monthly Income [USD]") + ylim(0,max(data_animate$value)*.5)
ggplot(data_animate %>% filter(aggregation=="individual"), aes(x=id, y=value)) + geom_line(aes(color=period, group=interaction(aggregation, period))) + xlab("Households") + ylab("Monthly Income [USD]") + ylim(0,max(data_animate$value)*.5)
ggsave("ZAF_LC_constructed_2.pdf", scale=1, width = 8, height = 6)
ggplot(data_animate %>% filter(aggregation=="cumsum"), aes(x=id, y=value)) + geom_smooth(aes(color=period, group=interaction(aggregation, period)), method="loess", span = 0.1, se=F) + xlab("Households") + ylab("Monthly Income [USD]") + ylim(0,max(data_animate$value)*.5)
ggsave("ZAF_LC_constructed_3.pdf", scale=1, width = 8, height = 6)

#p_anim <- ggplot(data_animate, aes(x=id, y=value)) + geom_point(aes(color=period)) + scale_y_log10()
#  transition_states(aggregation, transition_length = 1, state_length = 1, wrap = FALSE)
#animate(p_anim + enter_fade() + exit_shrink(), renderer = file_renderer(dir = "./animate/", prefix = "gganim_plot", overwrite = TRUE))
#animate(p_anim + enter_fade() + exit_shrink(), renderer = av_renderer(file="animation.mp4"))

share_dataset <- read_dta("SHARE_income.dta")

share_dataset_aggregated <- share_dataset %>% mutate(iso3=countrycode(country_name, "country.name", "iso3c")) %>% mutate(income_before=cahh017e, income_after=cae005e, weights=cchw_w8_ca) %>% filter(!is.na(income_before) & !is.na(income_after) & !is.na(weights)) %>% group_by(iso3) %>% dplyr::summarize(gini_before=100*gini(income_before, weights = weights), gini_after=100*gini(income_after, weights = weights), average_loss=(weighted.mean(income_after, w=weights)/weighted.mean(income_before, w=weights)-1)*100, nobs=n()) %>% mutate(gini_impact=gini_after-gini_before) %>% as.data.frame()
print(share_dataset_aggregated)
print(xtable(share_dataset_aggregated, digits = 2, label = "tab:share", caption = "Inequality impact of COVID-19 [SHARE dataset]", ), file="share_eu_countries.tex", include.rownames=FALSE, size = "small")
#EU average
print(share_dataset_aggregated %>% dplyr::summarize_all(.funs = mean))

#map
library("sf")
library("rnaturalearth")
library("scatterpie")
europe <- ne_countries(scale = "medium", returnclass = "sf", continent = "europe")
world_points <- st_centroid(europe)
# extract labels
world_points <- cbind(europe, st_coordinates(st_centroid(europe$geometry)))
#add my data to country map
europe <- europe %>% left_join(share_dataset_aggregated %>% dplyr::rename(iso_a3=iso3))

p_eu <- ggplot(data = europe) + geom_sf(aes(fill = gini_impact)) + xlab("") + ylab("") + ggtitle("Change in Gini due to COVID-19 [points]") + coord_sf(xlim = c(-10, 40), ylim = c(35, 72)) + labs(fill='Gini change [points]') + scale_fill_gradient2_tableau(palette = "Red-Blue Diverging", na.value = "grey50", guide = "colourbar", trans="reverse", limits = c(5, -5))
print(p_eu)
ggsave("SHARE_EU_GINI_change_MAP.pdf", width = 8, height = 6)

# SHARE EU Lorenz curves
share_dataset_lc <- share_dataset %>% mutate(iso3=countrycode(country_name, "country.name", "iso3c")) %>% mutate(income_before=cahh017e, income_after=cae005e, weights=cchw_w8_ca) %>% filter(!is.na(income_before) & !is.na(income_after) & !is.na(weights))
ggplot(share_dataset_lc) + stat_lorenz(aes(income_before, color=iso3, linetype="February 2020")) + stat_lorenz(aes(income_after, color=iso3, linetype="April 2020")) + geom_abline(color = "grey") + xlab("") + ylab("") + labs(isoe="Country", linetype="") + theme(legend.position = "bottom") + ggtitle("SHARE dataset") + scale_linetype_manual(values=c("February 2020" = "solid", "March 2020" = "dashed", "April 2020" = "dashed")) + theme(legend.position="right")
ggsave("SHARE_Lorenz_Curve.pdf", width = 8, height = 6)

#difference in Lorenz curves 
share_dataset_lc_rearranged <- share_dataset_lc %>% group_by(iso3) %>% arrange(income_before) %>% mutate(id=row_number()) %>% as.data.frame()

#without weights
share_dataset_lc_rearranged$weights <- 1

#add cumsum for gini
share_dataset_lc_rearranged <- share_dataset_lc_rearranged %>% group_by(iso3) %>% mutate(cumsum_before=cumsum(income_before*weights)/max(cumsum(income_before*weights)), idlc=id/max(id))
#now reorder cumsum_post to get correct post LC
share_dataset_lc_rearranged_post <- share_dataset_lc_rearranged %>% group_by(iso3) %>% arrange(income_after) %>% mutate(id=row_number()) %>% as.data.frame()
#add cumsum for gini
share_dataset_lc_rearranged_post <- share_dataset_lc_rearranged_post %>% group_by(iso3) %>% mutate(cumsum_after=cumsum(income_after*weights)/max(cumsum(income_after*weights)), idlc=id/max(id))

#compute difference
share_dataset_lc_combined <- share_dataset_lc_rearranged %>% select(iso3, idlc, cumsum_before) %>% full_join(share_dataset_lc_rearranged_post %>% select(iso3, idlc, cumsum_after)) %>% mutate(difference=cumsum_after-cumsum_before)

ggplot() + geom_line(data = share_dataset_lc_rearranged, aes(x=idlc, y=cumsum_before, color=iso3)) + geom_abline(slope=1, intercept=0, color="grey")  + xlim(0,1) + geom_line(data = share_dataset_lc_rearranged_post, aes(x=idlc, y=cumsum_after, color=iso3), linetype="dashed") + xlab("") + ylab("")
#Difference
ggplot() + geom_line(data = share_dataset_lc_combined, aes(x=idlc, y=difference, color=iso3))
ggplot() + geom_smooth(data = share_dataset_lc_combined, aes(x=idlc, y=difference, color=iso3), method = "loess", span = 0.2, se=F) + geom_label_repel(data = share_dataset_lc_combined %>% group_by(iso3) %>% slice(500), aes(x=idlc, y=difference, color=iso3, label=iso3), show.legend = FALSE) + xlab("") + ylab("Lorenz curve difference")

ggsave("SHARE_Gini_difference.pdf", width = 8, height = 6)

#cross country table and regression
govt_response <- read_dta("govt_response.dta")
govt_response <- govt_response %>% mutate(iso3=country_code) %>% group_by(iso3) %>% dplyr::summarize(policy_stringency=max(value[bdate<="2020-3-31"]))
#downloaded from https://covid19.who.int/WHO-COVID-19-global-data.csv
dataset_cases <- fread("https://covid19.who.int/WHO-COVID-19-global-data.csv")
dataset_cases <- dataset_cases %>% mutate(iso3=countrycode::countrycode(sourcevar = Country_code, origin = "iso2c", destination = "iso3c")) %>% group_by(iso3) %>% filter(Date_reported<="2020-3-31") %>% dplyr::summarize(cases=sum(New_cases), deaths=sum(New_deaths)) %>% ungroup() %>% select(iso3, cases, deaths) 

gini_matrix_all <- 
  data.frame(country=c("NGA", "ZAF", "KEN", "UGA", "IND"), 
             "Avg. [%]" = c(               
               as.numeric(data_nigeria_inequality  %>% filter(!is.na(income_before) & !is.na(income_after) & !is.na(wt_baseline)) %>% dplyr::summarize(diff=100*(weighted.mean(income_after, weights = wt_baseline)/weighted.mean(income_before, weights = wt_baseline)-1))),
               as.numeric(data_zaf  %>% dplyr::summarize(diff=100*(weighted.mean(w1_nc_eminc_apr, weights = w1_nc_wgt)/weighted.mean(w1_nc_eminc_feb, weights = w1_nc_wgt)-1))),
               as.numeric(data_ken  %>% dplyr::summarize(diff=100*(weighted.mean(post_lock_income, weights = weight)/weighted.mean(pre_lock_income, weights = weight)-1))),
               as.numeric(data_ugd  %>% dplyr::summarize(diff=100*(weighted.mean(post_lock_income, weights = weight)/weighted.mean(pre_lock_income, weights = weight)-1))),
               as.numeric(data_india  %>% filter(!is.na(con_wk_mar) & !is.na(con_wk_feb)) %>% dplyr::summarize(diff=100*(weighted.mean(con_wk_mar, weights = weight_raked)/weighted.mean(con_wk_feb, weights = weight_raked)-1)))
             ),
             "Gini Feb." = c(
               as.numeric(data_nigeria_inequality  %>% filter(!is.na(income_before) & !is.na(income_after) & !is.na(wt_baseline)) %>% dplyr::summarize(gini_cons_feb2020=gini(income_before, weights = wt_baseline))),
               as.numeric(data_zaf  %>% dplyr::summarize(gini_feb2020=gini(w1_nc_eminc_feb, weights = w1_nc_wgt))),
               as.numeric(data_ken  %>% dplyr::summarize(gini_apr2020=gini(pre_lock_income, weights = weight))),
               as.numeric(data_ugd  %>% dplyr::summarize(gini_apr2020=gini(pre_lock_income, weights = weight))),
               as.numeric(data_india  %>% filter(!is.na(con_wk_mar) & !is.na(con_wk_feb)) %>% dplyr::summarize(gini_cons_mar2020=gini(con_wk_feb, weights = weight_raked)))
             ),  
             "Gini Apr." = c(               
               as.numeric(data_nigeria_inequality  %>% filter(!is.na(income_before) & !is.na(income_after) & !is.na(wt_baseline)) %>% dplyr::summarize(gini_cons_mar2020=gini(income_after, weights = wt_baseline))),
               as.numeric(data_zaf  %>% dplyr::summarize(gini_apr2020=gini(w1_nc_eminc_apr, weights = w1_nc_wgt))),
               as.numeric(data_ken  %>% dplyr::summarize(gini_apr2020=gini(post_lock_income, weights = weight))),
               as.numeric(data_ugd  %>% dplyr::summarize(gini_apr2020=gini(post_lock_income, weights = weight))),
               as.numeric(data_india  %>% filter(!is.na(con_wk_mar) & !is.na(con_wk_feb)) %>% dplyr::summarize(gini_cons_mar2020=gini(con_wk_mar, weights = weight_raked)))
             )
             , check.names = FALSE)

print(gini_matrix_all)

#
country_dataset <- rbind(gini_matrix_all %>% mutate(`Gini Feb.`=100*`Gini Feb.`, `Gini Apr.`=100*`Gini Apr.`) %>% dplyr::rename(average_loss=`Avg. [%]`, gini_before=`Gini Feb.`, gini_after=`Gini Apr.`, iso3=country), share_dataset_aggregated %>% select(iso3, average_loss, gini_before, gini_after))

#add macro data
load("dataset_merged.Rdata")
#last gini index from WIID
data_hist_gini <- dataset_merged %>% select(gini_disp, gini_mkt, year, iso3) %>% filter(iso3 %in% country_dataset$iso3) %>% group_by(iso3) %>% arrange(year) %>% filter(!is.na(gini_disp)) %>% slice(n())

load("dataset_projection.Rdata")
#last gini index from IMF projection
imf_growth_2020 <- dataset_projection %>% ungroup() %>% select(gdp_growth_updated_october2020, population, year, iso3) %>% filter(iso3 %in% country_dataset$iso3) %>% group_by(iso3) %>% filter(year==2020)

#add macro stats
country_dataset <- country_dataset %>% 
  #  left_join(data_hist_gini %>% mutate("Gini (net)"=paste0(gini_disp/100), "Gini (gross)"=paste0(gini_mkt/100)) %>% select(-year, -gini_disp, -gini_mkt)) %>% 
  left_join(data_hist_gini %>% mutate("SWIID"=paste0(gini_disp/100)) %>% select(-year, -gini_disp, -gini_mkt)) %>% 
  left_join(imf_growth_2020 %>% select(-year) %>% dplyr::rename("GDP chg." = gdp_growth_updated_october2020)) %>% 
  left_join(dataset_cases) %>%
  left_join(govt_response) 

country_dataset <- country_dataset %>% mutate(gini_growth=100*I(gini_after/gini_before-1), gini_diff=gini_after-gini_before, cases_per_1000=cases/population*1e-6*1000)

print(xtable(rbind(country_dataset %>% select(iso3, average_loss, gini_before, gini_after, gini_diff, `GDP chg.`), country_dataset %>% select(iso3, average_loss, gini_before, gini_after, gini_diff, `GDP chg.`) %>% summarize_all(.funs = mean) %>% mutate(iso3="Average")), digits = 3, label = "tab:gini", caption = "Aggregate estimates for 2020, various countries", ), file="gini_table_all.tex", include.rownames=FALSE, size = "small")

require(GGally)
ggpairs(country_dataset %>% select(gini_growth, `GDP chg.`, cases_per_1000, policy_stringency))

summary(lm(formula = "100*I(gini_after/gini_before-1) ~ `GDP chg.` + policy_stringency", data = country_dataset))

stargazer(lm(formula = "100*I(gini_after/gini_before-1) ~ `GDP chg.`", data = country_dataset), lm(formula = "100*I(gini_after/gini_before-1) ~ `GDP chg.` + policy_stringency", data = country_dataset))
